rm(list=ls())
library(tidyverse)

# Create folder to store files
dir.create(here::here("data","processed"))

dir.create(here::here("data","processed","elections"))

# Create Elections Data -------
# Read elections data
mun_elec_files <- list.files(here::here("data","raw","TSE"),
                             pattern = "mun_elec",
                             full.names = T)

mun_elec_data <- map_df(mun_elec_files,read_rds)

# Harmonize party acronyms
mun_elec_data <- mun_elec_data %>% 
  mutate(composicao_legenda = str_remove_all(composicao_legenda,"[0-9]{2}|-|\\s$"),
         composicao_legenda = str_replace_all(composicao_legenda, " ", ""),
         composicao_legenda = str_replace(composicao_legenda, "^\\/", ""),
         composicao_legenda = str_to_upper(composicao_legenda),
         sigla_partido = str_replace_all(sigla_partido," ",""),
         sigla_partido = str_to_upper(sigla_partido),
         nome_candidato = str_to_upper(nome_candidato))

# Check years
mun_elec_data %>% distinct(ano_eleicao)

# Exlcude eletions in non-election years (it only excludes referendums)
mun_elec_data_cleaned <- mun_elec_data %>% 
  filter(ano_eleicao %in% seq.int(from = 1996, to = 2016, by = 4))

# Check descricao_eleicao
mun_elec_data_cleaned %>% distinct(descricao_eleicao)
mun_elec_data_cleaned %>% distinct(descricao_eleicao) %>% filter(is.na(descricao_eleicao))

# Check if irregular elections are duplicated in the original datset (i.e. it includes the election declared irregular and the new eleciton)
mun_elec_data_cleaned %>% 
  distinct(sigla_uf,sigla_ue,descricao_eleicao,ano_eleicao) %>% 
  group_by(sigla_uf,sigla_ue,ano_eleicao) %>% 
  summarise(n_obs = n()) %>% 
  filter(n_obs != 1)

# Create an indicator for irregular electons
mun_elec_data_cleaned <- mun_elec_data_cleaned %>% 
  mutate(irregular = ifelse(descricao_eleicao %in% 
                              c("ELEICOES 1996","ELEICOES 2000","ELEICOES 2004","ELEICOES 2008","ELEICAO MUNICIPAL 2012","ELEICOES MUNICIPAIS 2016"),
                            0,
                            1))

irregular_elections <- mun_elec_data_cleaned %>% 
  filter(irregular == 1) 

# Excludes all municipalities with irregular elections
irregular_elections_identifiers <- irregular_elections %>% 
  distinct(ano_eleicao,sigla_uf,sigla_ue,codigo_municipio,descricao_cargo)
  
mun_elec_data_cleaned <- mun_elec_data_cleaned %>% 
  anti_join(irregular_elections_identifiers,
            by = c("ano_eleicao","sigla_uf","sigla_ue","codigo_municipio","descricao_cargo"))

# Check exclusions (OK)
mun_elec_data_cleaned %>% 
  distinct(sigla_uf,sigla_ue,descricao_eleicao,ano_eleicao) %>% 
  group_by(sigla_uf,sigla_ue,ano_eleicao) %>% 
  mutate(n_obs = n()) %>% 
  filter(n_obs != 1)

# check duplicates in irregular elections 

duplicates_in_irregular <- irregular_elections %>% 
  distinct(sigla_uf,sigla_ue,descricao_eleicao,ano_eleicao) %>%
  group_by(sigla_uf,sigla_ue,ano_eleicao) %>% 
  mutate(n_obs = n()) %>% 
  filter(n_obs != 1)

# Exclude these duplicates
irregular_elections <- irregular_elections %>% 
  anti_join(duplicates_in_irregular, by = c("sigla_uf","sigla_ue","ano_eleicao"))

# Add suplementar elections
mun_elec_data_cleaned <- mun_elec_data_cleaned %>% 
  bind_rows(irregular_elections)

# Check if there is any duplicates in mayoral elections
mun_elec_data_cleaned %>% 
  filter(descricao_cargo == "Prefeito") %>% 
  distinct(sigla_uf,sigla_ue,descricao_eleicao,ano_eleicao) %>% 
  group_by(sigla_uf,sigla_ue,ano_eleicao) %>% 
  mutate(n_obs = n()) %>% 
  filter(n_obs != 1)

# Check number of irregular elections (need to be 416 = 420 (irregular) - 4 (duplicates in irregular))
mun_elec_data_cleaned %>%
  filter(irregular == 1) %>%
  distinct(ano_eleicao,sigla_ue,sigla_uf) %>%
  nrow()

# Check descricao_cargo
mun_elec_data_cleaned %>% distinct(descricao_cargo)

#harmonize descricao_cargo 
mun_elec_data_cleaned <- mun_elec_data_cleaned %>% 
  mutate(descricao_cargo = str_to_upper(descricao_cargo))

mun_elec_data_cleaned %>% distinct(descricao_cargo)

# Check number of cities per year
mun_elec_data_cleaned %>% 
  distinct(ano_eleicao,codigo_municipio) %>% 
  group_by(ano_eleicao) %>% 
  summarise(n_obs = n())

# Data for 1996 is imcomplete!

# Create mayor and citycouncil datasets
mayor_elections <- mun_elec_data_cleaned %>% 
  filter(ano_eleicao %in% c(2000,2004,2008,2012,2016),descricao_cargo == "PREFEITO") 

citycouncil_elections <- mun_elec_data_cleaned %>% 
  filter(ano_eleicao %in% c(2000,2004,2008,2012,2016),descricao_cargo == "VEREADOR") 

# Save
write_rds(mayor_elections, here::here("data","processed","elections","mayor_elections_2000_2016.rds"))
write_rds(citycouncil_elections, here::here("data","processed","elections","citycouncil_elections_2000_2016.rds"))

# Create Coalitions Data -------
# Read elections data 
mun_coalitions_files <- list.files(here::here("data","raw","TSE"),
                             pattern = "mun_coalitions",
                             full.names = T)

mun_coalitions_data <- map_df(mun_coalitions_files,read_rds)

# Harmonizte party acronyms
mun_coalitions_data <- mun_coalitions_data %>% 
  mutate(composicao_coligacao = str_remove_all(composicao_coligacao,"[0-9]{2}|-|\\s$"),
         composicao_coligacao = str_replace_all(composicao_coligacao, " ", ""),
         composicao_coligacao = str_replace(composicao_coligacao, "^\\/", ""),
         composicao_coligacao = str_to_upper(composicao_coligacao),
         sigla_partido = str_replace_all(sigla_partido," ",""),
         sigla_partido = str_to_upper(sigla_partido))

# Chek years
mun_coalitions_data %>% 
  distinct(ano_eleicao)

# Exlcude eletions in non-election years
mun_coalitions_data_cleaned <- mun_coalitions_data %>% 
  filter(ano_eleicao %in% seq.int(from = 1996, to = 2016, by = 4))

# Check descricao_eleicao
mun_coalitions_data_cleaned %>% distinct(descricao_eleicao)

# Check if there is more than one observation per turn (yes there are)
mun_coalitions_data_cleaned %>% distinct(num_turno)

# check if suplementares are included in original year 

irregular_2000 <- mun_coalitions_data_cleaned %>% 
  filter(!str_detect(descricao_eleicao,"^ELEICOES 2000$"),ano_eleicao == 2000) %>% 
  distinct(ano_eleicao,sigla_uf,sigla_ue,descricao_cargo,num_turno) %>% 
  mutate(irregular = 1)

irregular_2004 <- mun_coalitions_data_cleaned %>% 
  filter(!str_detect(descricao_eleicao,"^ELEICOES 2004$"),ano_eleicao == 2004) %>% 
  distinct(ano_eleicao,sigla_uf,sigla_ue,descricao_cargo,num_turno) %>% 
  mutate(irregular = 1)

irregular_2008 <- mun_coalitions_data_cleaned %>% 
  filter(!str_detect(descricao_eleicao,"^ELEICOES 2008$"),ano_eleicao == 2008) %>% 
  distinct(ano_eleicao,sigla_uf,sigla_ue,descricao_cargo,num_turno) %>% 
  mutate(irregular = 1)

irregular_2012 <- mun_coalitions_data_cleaned %>% 
  filter(!str_detect(descricao_eleicao,"^ELEICAO MUNICIPAL 2012$"),ano_eleicao == 2012) %>% 
  distinct(ano_eleicao,sigla_uf,sigla_ue,descricao_cargo,num_turno) %>% 
  mutate(irregular = 1)

irregular_2016 <- mun_coalitions_data_cleaned %>% 
  filter(!str_detect(descricao_eleicao,"^ELEICOES MUNICIPAIS 2016$"),ano_eleicao == 2016) %>% 
  distinct(ano_eleicao,sigla_uf,sigla_ue,descricao_cargo,num_turno) %>% 
  mutate(irregular = 1)

irregular <- bind_rows(irregular_2000,irregular_2004,irregular_2008,irregular_2012,irregular_2016)
rm(irregular_2000,irregular_2004,irregular_2008,irregular_2012,irregular_2016)

mun_coalitions_data_cleaned <- mun_coalitions_data_cleaned %>% 
  left_join(irregular,
            by = c("ano_eleicao","sigla_uf","sigla_ue","descricao_cargo","num_turno"))

# Filter irregular and check if they are duplicated (they are)
duplicate_irregular <- mun_coalitions_data_cleaned %>% filter(irregular == 1)

# Exclude irregular elections
mun_coalitions_data_regular <- mun_coalitions_data_cleaned %>% 
  filter(is.na(irregular))

# Check if all irregulars (suplementar and city names) were excluded (yes)
mun_coalitions_data_regular %>% distinct(descricao_eleicao)

# Check descricao_cargo
mun_coalitions_data_regular %>% distinct(descricao_cargo)

# Check number of cities per year
mun_coalitions_data_regular %>% 
  distinct(ano_eleicao,sigla_ue) %>% 
  group_by(ano_eleicao) %>% 
  summarise(n_obs = n())

# Create mayor and citycouncil datasets
mayor_coalitions <- mun_coalitions_data_regular %>% 
  filter(ano_eleicao %in% c(2000,2004,2008,2012,2016),descricao_cargo == "PREFEITO") %>% 
  rename_all(tolower)

citycouncil_coalitions <- mun_coalitions_data_regular %>% 
  filter(ano_eleicao %in% c(2000,2004,2008,2012,2016),descricao_cargo == "VEREADOR") %>% 
  rename_all(tolower)

# Save

write_rds(mayor_coalitions, here::here("data","processed","elections","mayor_coalitions_2000_2016.rds"))

write_rds(citycouncil_coalitions, here::here("data","processed","elections","citycouncil_coalitions_2000_2016.rds"))
